# Dr. M. Baron, Statistical Machine Learning class, STAT-427/627

# Bootstrap analysis of efficacy of the Pfizer-BioNTech COVID-19 Vaccine

 

# The Phase III clinical trial of Pfizer vaccine included n=44000 people,

# 50% took trt, 50% placebo.

 

# Result: 8 vaccinated and 162 placebo participants contracted COVID-19.

# A conclusion was made that vaccination reduces the risk of COVID by

# the factor of 20.

 

# Vaccine critics point to a small sample size of 170 for any meaningful

# conclusions. Here, we explore what confidence statements we can deduce

# from these data.

 

# Construct a Bootstrap CI for p1/p2 = the ratio of probabilities

# p1 = P( infection | placebo group ), p2 = P( infection | vaccine group )

 

V = c(rep(0,22000),rep(1,22000));        # 0 = placebo, 1 = treatment

X = c(rep(0,21838),rep(1,162),rep(0,21992),rep(1,8)); # 1 = contracted covid

XV = data.frame(X,V);

 

pratio = function(xv,Z){                # function of data xv and a subsample Z

  x = xv[,1]; v = xv[,2];

  Pplacebo = sum(x[Z]==1 & v[Z]==0)/sum(v[Z]==0);

  Pvaccine = sum(x[Z]==1 & v[Z]==1)/sum(v[Z]==1);

  return( Pplacebo/Pvaccine )

}

 

pratio(XV,)

 

# The ratio is estimated as 20.25. We need to supply it with the margin of error

# and a confidence level. That is, use Bootstrap to construct a confidence interval.

 

library(boot)                           

b = boot( XV, pratio, R=10000 )  # Apply bootstrap

quantile(b$t,c(0.025,0.975))                # This is a 95% confidence interval for p1/p2

 

#     2.5%    97.5%            # Thus, we can claim with 95% confidence that the ratio of

# 11.34329 54.51769     # probabilities is between 11.3 and 54.5. But do we need an interval,

                        # or should we just find a lower bound?

quantile(b$t,0.05)      # Try a one-sided confidence interval.

#       5%              # We can now claim, also with 95% confidence, that vaccination

# 12.29509              # reduces the chance of COVID-19 infection by at least a factor of 12.

 

################################################################################

### Official efficacy of the vaccine                                                                                                             ###

### The efficacy of a vaccine is defined as the difference of risks between                                       ###

### the vaccine group and the placebo group, relative to the placebo group.                                  ###

################################################################################

 

### To study the efficacy of Pfizer, we modify our function:

 

efficacy = function(xv,Z){ # function of data xv and a subsample Z

   x = xv[,1]; v = xv[,2];

   Pplacebo = sum(x[Z]==1 & v[Z]==0)/sum(v[Z]==0);

   Pvaccine = sum(x[Z]==1 & v[Z]==1)/sum(v[Z]==1);

   return( (Pplacebo - Pvaccine)/Pplacebo ) }

 

pratio(XV,)   # This gives the estimated efficacy which you see in all official reports

# 0.9506173   # But we know that it is only based on the sample, so it has a margin of error.

              # So, we proceed by building a confidence interval

 

b = boot( XV, efficacy, 10000 )                  # Bootstrap!

hist(b$t)     # This is a histogram of bootstrap efficacy values, it's not very symmetric

              # I'm just checking normality, to see if we can settle with a PARAMETRIC

              # bootstrap confidence interval, which is the mean b$t, plus or minus 1.96

              # standard deviations

 

shapiro.test(b$t)                           # Shapiro-Wilk test is a rigorous test of normality

Error in shapiro.test(b$t) : sample size must be between 3 and 5000

                      # The way it's built in R does not handle samples > 5000               

shapiro.test(b$t[sample(10000,5000)])   # Ok, we select a random sample of n=5000

 

#        Shapiro-Wilk normality test

#

# data:  b$t[sample(10000, 5000)]

# W = 0.98926, p-value < 2.2e-16        # Shapiro-Wilk test rejects normal distribution.

                                        # Hence, we construct a NONPARAMETRIC

quantile( b$t, c(0.025,0.975) )         # bootstrap confidence interval

#      2.5%     97.5%

# 0.9105982 0.9816955                   # This is a 95% confidence interval for efficacy

quantile( b$t, c(0.005,0.995) )

     0.5%     99.5%

0.8947269 0.9882617                     # This is a 99% confidence interval